library(tidyverse)
library(readxl)
library(janitor)
library(scales)
library(kableExtra)
v2.1 Change: 9 programs across all 6 SHRS departments, each with a unique SOC code. SS (Exercise Physiologists) restored; OT-CScD duplicate removed.
# —— EDIT THIS ONE LINE to match your machine ——
MARKET_ROOT <- file.path(path.expand("~"), "Documents", "SHRS_Analysis",
"market_analysis")
# Skills data paths
BLS_SKILLS_PATH <- file.path(MARKET_ROOT, "skills", "bls", "skills.xlsx")
ONET_SKILLS_PATH <- file.path(MARKET_ROOT, "skills", "onet", "Skills-2.xlsx")
ONET_EDU_PATH <- file.path(MARKET_ROOT, "skills", "onet",
"Education__Training__and_Experience.xlsx")
# Full crosswalk (9 programs, 9 unique SOC codes)
soc_crosswalk <- tribble(
~shrs_program, ~shrs_dept, ~soc_code, ~occupation_title, ~pitt_degree,
"SLP", "CSD", "29-1127", "Speech-Language Pathologists", "Master's",
"AuD", "CSD", "29-1181", "Audiologists", "Doctoral",
"HIM", "HIM", "29-9021", "Health Information Technologists","Master's",
"OTD", "OT", "29-1122", "Occupational Therapists", "Doctoral",
"DPT", "PT", "29-1123", "Physical Therapists", "Doctoral",
"PAS", "PAS", "29-1071", "Physician Assistants", "Master's",
"AT", "SMN", "29-9091", "Athletic Trainers", "Master's",
"DN", "SMN", "29-1031", "Dietitians and Nutritionists", "Master's",
"SS", "SMN", "29-1128", "Exercise Physiologists", "Master's"
)
# All SOC codes are unique — no dedup needed
target_socs <- soc_crosswalk$soc_code
# Color palette for all 9 programs
program_colors <- c(
"SLP" = "#2c7bb6", "AuD" = "#abd9e9",
"HIM" = "#756bb1",
"OTD" = "#e6550d",
"DPT" = "#31a354",
"PAS" = "#de2d26",
"AT" = "#fdae61", "DN" = "#d7191c",
"SS" = "#1a9641"
)
BLS Table 6.5 provides percentile ranks (0–100) across 17 skill dimensions for each occupation. Higher = that skill is more important relative to all other occupations in the economy.
bls_skills_wide <- read_excel(BLS_SKILLS_PATH, sheet = "Table 6.5", skip = 1,
col_types = "text") |>
clean_names()
# Rename first two columns by position
names(bls_skills_wide)[1] <- "occupation_title"
names(bls_skills_wide)[2] <- "soc_code"
# Filter to our targets and pivot long
bls_skills <- bls_skills_wide |>
filter(soc_code %in% target_socs) |>
pivot_longer(cols = -c(occupation_title, soc_code),
names_to = "skill", values_to = "percentile") |>
mutate(
percentile = as.numeric(percentile),
skill = str_replace_all(skill, "_", " ") |> str_to_title()
) |>
left_join(soc_crosswalk |> select(shrs_program, shrs_dept, soc_code, occupation_title), by = "soc_code")
bls_skills |>
ggplot(aes(x = shrs_program, y = reorder(skill, percentile), fill = percentile)) +
geom_tile(color = "white", linewidth = 0.5) +
geom_text(aes(label = percentile), size = 2.8, color = "black") +
scale_fill_gradient2(low = "#d73027", mid = "#fee08b", high = "#1a9850",
midpoint = 50, limits = c(0, 100),
name = "Percentile\nRank") +
labs(
title = "BLS Skill Percentile Ranks by SHRS Program",
subtitle = "0 = lowest across all occupations; 100 = highest | 9 programs, 9 unique SOC codes",
x = NULL, y = NULL
) +
theme_minimal(base_size = 12) +
theme(axis.text.x = element_text(face = "bold", angle = 30, hjust = 1),
panel.grid = element_blank())
bls_skills |>
ggplot(aes(x = reorder(skill, percentile), y = percentile,
fill = shrs_program)) +
geom_col(width = 0.7) +
geom_hline(yintercept = 50, linetype = "dashed", color = "gray50") +
coord_flip() +
facet_wrap(~ shrs_program, ncol = 2) +
scale_fill_manual(values = program_colors) +
scale_y_continuous(limits = c(0, 100)) +
labs(
title = "Skill Profiles by SHRS Program (BLS Percentile Ranks)",
subtitle = "Dashed line = 50th percentile (median across all occupations)",
x = NULL, y = "Percentile Rank (0–100)"
) +
theme_minimal(base_size = 10) +
theme(legend.position = "none")
bls_skills |>
group_by(shrs_program) |>
slice_max(percentile, n = 3) |>
arrange(shrs_program, desc(percentile)) |>
select(Program = shrs_program, Skill = skill, Percentile = percentile) |>
kable() |>
kable_styling(bootstrap_options = c("striped", "hover"), full_width = FALSE)
| Program | Skill | Percentile |
|---|---|---|
| AT | Adaptability | 94 |
| AT | Interpersonal | 94 |
| AT | Problem Solving And Decision Making | 92 |
| AuD | Customer Service | 98 |
| AuD | Interpersonal | 98 |
| AuD | Computers And Information Technology | 96 |
| DN | Interpersonal | 92 |
| DN | Science | 92 |
| DN | Project Management | 87 |
| DN | Writing And Reading | 87 |
| DPT | Interpersonal | 98 |
| DPT | Problem Solving And Decision Making | 95 |
| DPT | Customer Service | 92 |
| HIM | Computers And Information Technology | 99 |
| HIM | Mathematics | 89 |
| HIM | Critical And Analytical Thinking | 86 |
| OTD | Interpersonal | 98 |
| OTD | Adaptability | 96 |
| OTD | Creativity And Innovation | 90 |
| OTD | Problem Solving And Decision Making | 90 |
| PAS | Adaptability | 100 |
| PAS | Interpersonal | 99 |
| PAS | Problem Solving And Decision Making | 98 |
| SLP | Adaptability | 99 |
| SLP | Interpersonal | 97 |
| SLP | Critical And Analytical Thinking | 93 |
| SLP | Problem Solving And Decision Making | 93 |
| SS | Interpersonal | 94 |
| SS | Writing And Reading | 87 |
| SS | Science | 86 |
Which skills most differentiate each program from the average of the other programs?
n_other <- length(unique(target_socs)) - 1
bls_diff <- bls_skills |>
group_by(skill) |>
mutate(
others_mean = (sum(percentile) - percentile) / n_other
) |>
ungroup() |>
mutate(diff_from_others = percentile - others_mean)
bls_diff |>
group_by(shrs_program) |>
slice_max(abs(diff_from_others), n = 5) |>
arrange(shrs_program, desc(diff_from_others)) |>
select(Program = shrs_program, Skill = skill,
Percentile = percentile, `Others Avg` = others_mean,
`Difference` = diff_from_others) |>
mutate(across(where(is.numeric), ~ round(.x, 1))) |>
kable() |>
kable_styling(bootstrap_options = c("striped", "hover"), full_width = FALSE)
| Program | Skill | Percentile | Others Avg | Difference |
|---|---|---|---|---|
| AT | Detail Oriented | 83 | 46.8 | 36.2 |
| AT | Physical Strength And Stamina | 70 | 40.8 | 29.2 |
| AT | Critical And Analytical Thinking | 63 | 85.9 | -22.9 |
| AT | Writing And Reading | 62 | 86.1 | -24.1 |
| AT | Mathematics | 16 | 61.5 | -45.5 |
| AuD | Mechanical | 64 | 27.8 | 36.2 |
| AuD | Customer Service | 98 | 67.4 | 30.6 |
| AuD | Computers And Information Technology | 96 | 69.1 | 26.9 |
| AuD | Fine Motor | 60 | 33.4 | 26.6 |
| AuD | Project Management | 84 | 60.1 | 23.9 |
| DN | Mathematics | 85 | 52.9 | 32.1 |
| DN | Adaptability | 55 | 83.0 | -28.0 |
| DN | Physical Strength And Stamina | 16 | 47.5 | -31.5 |
| DN | Detail Oriented | 21 | 54.5 | -33.5 |
| DN | Fine Motor | 4 | 40.4 | -36.4 |
| DPT | Physical Strength And Stamina | 80 | 39.5 | 40.5 |
| DPT | Fine Motor | 58 | 33.6 | 24.4 |
| DPT | Customer Service | 92 | 68.1 | 23.9 |
| DPT | Leadership | 84 | 67.5 | 16.5 |
| DPT | Computers And Information Technology | 51 | 74.8 | -23.8 |
| HIM | Adaptability | 43 | 84.5 | -41.5 |
| HIM | Physical Strength And Stamina | 4 | 49.0 | -45.0 |
| HIM | Customer Service | 29 | 76.0 | -47.0 |
| HIM | Problem Solving And Decision Making | 39 | 86.5 | -47.5 |
| HIM | Interpersonal | 36 | 96.2 | -60.2 |
| OTD | Creativity And Innovation | 90 | 60.1 | 29.9 |
| OTD | Adaptability | 96 | 77.9 | 18.1 |
| OTD | Detail Oriented | 32 | 53.1 | -21.1 |
| OTD | Computers And Information Technology | 53 | 74.5 | -21.5 |
| OTD | Mathematics | 26 | 60.2 | -34.2 |
| PAS | Detail Oriented | 97 | 45.0 | 52.0 |
| PAS | Mathematics | 79 | 53.6 | 25.4 |
| PAS | Computers And Information Technology | 93 | 69.5 | 23.5 |
| PAS | Adaptability | 100 | 77.4 | 22.6 |
| PAS | Creativity And Innovation | 39 | 66.5 | -27.5 |
| SLP | Adaptability | 99 | 77.5 | 21.5 |
| SLP | Mathematics | 39 | 58.6 | -19.6 |
| SLP | Physical Strength And Stamina | 25 | 46.4 | -21.4 |
| SLP | Leadership | 50 | 71.8 | -21.8 |
| SLP | Project Management | 19 | 68.2 | -49.2 |
| SS | Physical Strength And Stamina | 65 | 41.4 | 23.6 |
| SS | Problem Solving And Decision Making | 63 | 83.5 | -20.5 |
| SS | Detail Oriented | 27 | 53.8 | -26.8 |
| SS | Adaptability | 56 | 82.9 | -26.9 |
| SS | Project Management | 32 | 66.6 | -34.6 |
O*NET provides 35 specific skills scored on importance (1–5 scale) and level (1–7 scale). We focus on importance scores to understand what matters most for each occupation.
onet_raw <- read_excel(ONET_SKILLS_PATH, sheet = "Skills") |>
clean_names()
# Filter: our target SOCs, importance scores only, not suppressed
onet_skills <- onet_raw |>
mutate(soc_code = str_sub(o_net_soc_code, 1, 7)) |>
filter(soc_code %in% target_socs,
scale_id == "IM",
recommend_suppress != "Y" | is.na(recommend_suppress)) |>
select(soc_code, title, skill = element_name,
importance = data_value) |>
left_join(soc_crosswalk |> select(shrs_program, shrs_dept, soc_code, occupation_title), by = "soc_code")
onet_skills |>
group_by(shrs_program) |>
slice_max(importance, n = 10) |>
ggplot(aes(x = reorder(skill, importance), y = importance,
fill = shrs_program)) +
geom_col(width = 0.7) +
coord_flip() +
facet_wrap(~ shrs_program, ncol = 2, scales = "free_y") +
scale_fill_manual(values = program_colors) +
scale_y_continuous(limits = c(0, 5)) +
labs(
title = "Top 10 Most Important Skills by SHRS Program",
subtitle = "O*NET Importance Score (1–5 scale)",
x = NULL, y = "Importance"
) +
theme_minimal(base_size = 10) +
theme(legend.position = "none")
onet_skills |>
ggplot(aes(x = shrs_program, y = reorder(skill, importance), fill = importance)) +
geom_tile(color = "white", linewidth = 0.3) +
geom_text(aes(label = round(importance, 1)), size = 2.5) +
scale_fill_gradient2(low = "#d73027", mid = "#fee08b", high = "#1a9850",
midpoint = 2.5, limits = c(1, 5),
name = "Importance\n(1–5)") +
labs(
title = "O*NET Skill Importance Across SHRS Programs",
subtitle = "35 skills rated by importance (1 = not important, 5 = extremely important)",
x = NULL, y = NULL
) +
theme_minimal(base_size = 11) +
theme(axis.text.x = element_text(face = "bold", angle = 30, hjust = 1),
panel.grid = element_blank())
Group the 35 O*NET skills into functional clusters to see where each program’s strengths concentrate.
# Define skill clusters
cluster_map <- tribble(
~skill, ~cluster,
"Active Listening", "Communication",
"Speaking", "Communication",
"Writing", "Communication",
"Reading Comprehension", "Communication",
"Social Perceptiveness", "Interpersonal",
"Coordination", "Interpersonal",
"Persuasion", "Interpersonal",
"Negotiation", "Interpersonal",
"Instructing", "Interpersonal",
"Service Orientation", "Interpersonal",
"Critical Thinking", "Analytical",
"Complex Problem Solving", "Analytical",
"Judgment and Decision Making", "Analytical",
"Systems Analysis", "Analytical",
"Systems Evaluation", "Analytical",
"Operations Analysis", "Analytical",
"Active Learning", "Learning & Adaptability",
"Learning Strategies", "Learning & Adaptability",
"Monitoring", "Learning & Adaptability",
"Time Management", "Management",
"Management of Personnel Resources", "Management",
"Management of Material Resources", "Management",
"Management of Financial Resources", "Management",
"Quality Control Analysis", "Technical",
"Technology Design", "Technical",
"Equipment Selection", "Technical",
"Installation", "Technical",
"Programming", "Technical",
"Operations Monitoring", "Technical",
"Operation and Control", "Technical",
"Equipment Maintenance", "Technical",
"Troubleshooting", "Technical",
"Repairing", "Technical",
"Mathematics", "Science & Math",
"Science", "Science & Math"
)
onet_clustered <- onet_skills |>
left_join(cluster_map, by = "skill") |>
filter(!is.na(cluster)) |>
group_by(shrs_program, cluster) |>
summarise(avg_importance = mean(importance, na.rm = TRUE), .groups = "drop")
onet_clustered |>
ggplot(aes(x = cluster, y = avg_importance, fill = shrs_program)) +
geom_col(position = "dodge", width = 0.7) +
scale_fill_manual(values = program_colors) +
scale_y_continuous(limits = c(0, 5)) +
labs(
title = "Average Skill Importance by Cluster",
subtitle = "O*NET skills grouped into functional categories",
x = NULL, y = "Average Importance (1–5)", fill = "Program"
) +
theme_minimal(base_size = 12) +
theme(axis.text.x = element_text(angle = 30, hjust = 1))
onet_clustered |>
ggplot(aes(x = shrs_program, y = cluster, fill = avg_importance)) +
geom_tile(color = "white", linewidth = 0.5) +
geom_text(aes(label = round(avg_importance, 2)), size = 3.2) +
scale_fill_gradient2(low = "#d73027", mid = "#fee08b", high = "#1a9850",
midpoint = 2.5, limits = c(1, 5),
name = "Avg\nImportance") +
labs(
title = "Skill Cluster Heatmap by SHRS Program",
x = NULL, y = NULL
) +
theme_minimal(base_size = 12) +
theme(axis.text.x = element_text(face = "bold", angle = 30, hjust = 1),
panel.grid = element_blank())
What education, experience, and training does the labor market actually require for each SHRS occupation? This directly informs whether program structures are aligned with market expectations.
edu_raw <- read_excel(ONET_EDU_PATH,
sheet = "Education, Training, and Experi") |>
clean_names()
edu_raw <- edu_raw |>
mutate(soc_code = str_sub(o_net_soc_code, 1, 7)) |>
filter(soc_code %in% target_socs) |>
left_join(soc_crosswalk |> select(shrs_program, shrs_dept, soc_code, occupation_title), by = "soc_code")
What percentage of workers in each occupation hold each education level?
edu_labels <- tribble(
~category, ~edu_level,
1, "Less than HS",
2, "HS Diploma/GED",
3, "Post-secondary Certificate",
4, "Some College",
5, "Associate's Degree",
6, "Bachelor's Degree",
7, "Post-baccalaureate Certificate",
8, "Master's Degree",
9, "Post-master's Certificate",
10, "First Professional Degree",
11, "Doctoral Degree",
12, "Post-doctoral Training"
)
edu_dist <- edu_raw |>
filter(element_id == "2.D.1", scale_id == "RL") |>
mutate(category = as.integer(category),
pct = as.numeric(data_value)) |>
filter(pct > 0) |>
left_join(edu_labels, by = "category") |>
mutate(edu_level = factor(edu_level, levels = edu_labels$edu_level))
edu_dist |>
ggplot(aes(x = shrs_program, y = pct, fill = edu_level)) +
geom_col(width = 0.7) +
scale_fill_viridis_d(option = "D", direction = -1, name = "Education Level") +
labs(
title = "Education Level Distribution by SHRS Occupation",
subtitle = "Percent of workers at each education level (O*NET incumbent survey)",
x = NULL, y = "Percent of Workers (%)"
) +
theme_minimal(base_size = 12) +
theme(legend.position = "right",
axis.text.x = element_text(angle = 30, hjust = 1))
edu_dist |>
select(Program = shrs_program, `Education Level` = edu_level,
`% of Workers` = pct) |>
mutate(`% of Workers` = round(`% of Workers`, 1)) |>
kable() |>
kable_styling(bootstrap_options = c("striped", "hover"), full_width = FALSE)
| Program | Education Level | % of Workers |
|---|---|---|
| DN | Associate’s Degree | 3.3 |
| DN | Bachelor’s Degree | 10.0 |
| DN | Post-baccalaureate Certificate | 53.3 |
| DN | Master’s Degree | 33.3 |
| PAS | Bachelor’s Degree | 5.4 |
| PAS | Master’s Degree | 81.1 |
| PAS | First Professional Degree | 13.5 |
| PAS | HS Diploma/GED | 21.8 |
| PAS | Post-secondary Certificate | 16.8 |
| PAS | Associate’s Degree | 4.1 |
| PAS | Master’s Degree | 41.7 |
| PAS | Post-master’s Certificate | 1.1 |
| PAS | First Professional Degree | 14.5 |
| OTD | Bachelor’s Degree | 13.6 |
| OTD | Master’s Degree | 86.4 |
| OTD | Bachelor’s Degree | 17.4 |
| OTD | Post-baccalaureate Certificate | 21.7 |
| OTD | Master’s Degree | 56.5 |
| OTD | Post-master’s Certificate | 4.3 |
| DPT | Associate’s Degree | 6.3 |
| DPT | Bachelor’s Degree | 8.3 |
| DPT | Master’s Degree | 38.3 |
| DPT | Doctoral Degree | 47.1 |
| SLP | Master’s Degree | 88.5 |
| SLP | Post-master’s Certificate | 11.5 |
| SS | Bachelor’s Degree | 59.1 |
| SS | Master’s Degree | 31.8 |
| SS | Doctoral Degree | 9.1 |
| AuD | Doctoral Degree | 95.4 |
| AuD | Post-doctoral Training | 4.6 |
| AT | HS Diploma/GED | 8.3 |
| AT | Bachelor’s Degree | 37.4 |
| AT | Master’s Degree | 54.3 |
ojt_labels <- tribble(
~category, ~training,
1, "None",
2, "1 day to 1 month",
3, "1–3 months",
4, "3–6 months",
5, "6 months–1 year",
6, "1–2 years",
7, "2–4 years",
8, "4–10 years",
9, "Over 10 years"
)
ojt_dist <- edu_raw |>
filter(element_id == "3.A.3", scale_id == "OJ") |>
mutate(category = as.integer(category),
pct = as.numeric(data_value)) |>
filter(pct > 0) |>
left_join(ojt_labels, by = "category") |>
mutate(training = factor(training, levels = ojt_labels$training))
if (nrow(ojt_dist) > 0) {
ojt_dist |>
ggplot(aes(x = shrs_program, y = pct, fill = training)) +
geom_col(width = 0.7) +
scale_fill_viridis_d(option = "B", direction = -1, name = "OJT Duration") +
labs(
title = "On-the-Job Training Requirements by SHRS Occupation",
subtitle = "O*NET incumbent survey",
x = NULL, y = "Percent of Workers (%)"
) +
theme_minimal(base_size = 12) +
theme(legend.position = "right",
axis.text.x = element_text(angle = 30, hjust = 1))
}
Bringing together the skill profiles and education pathways into a single program-level summary.
# Dominant education level per program
dominant_edu <- edu_dist |>
group_by(shrs_program) |>
slice_max(pct, n = 1) |>
select(shrs_program, dominant_edu = edu_level, edu_pct = pct)
# Top skill cluster per program
top_cluster <- onet_clustered |>
group_by(shrs_program) |>
slice_max(avg_importance, n = 1) |>
select(shrs_program, top_cluster = cluster, cluster_importance = avg_importance)
# Top BLS skill per program
top_bls_skill <- bls_skills |>
group_by(shrs_program) |>
slice_max(percentile, n = 1) |>
select(shrs_program, top_bls_skill = skill, top_percentile = percentile)
# Average O*NET importance (overall skill intensity)
avg_intensity <- onet_skills |>
group_by(shrs_program) |>
summarise(avg_skill_importance = round(mean(importance, na.rm = TRUE), 2),
.groups = "drop")
# Combine
synthesis <- soc_crosswalk |>
select(shrs_program, shrs_dept) |>
left_join(dominant_edu, by = "shrs_program") |>
left_join(top_cluster, by = "shrs_program") |>
left_join(top_bls_skill, by = "shrs_program") |>
left_join(avg_intensity, by = "shrs_program")
synthesis |>
select(
Program = shrs_program,
Dept = shrs_dept,
`Primary Education` = dominant_edu,
`% at that Level` = edu_pct,
`Top Skill Cluster` = top_cluster,
`Top BLS Skill` = top_bls_skill,
`Skill Intensity` = avg_skill_importance
) |>
kable(digits = 1) |>
kable_styling(bootstrap_options = c("striped", "hover"), full_width = FALSE)
| Program | Dept | Primary Education | % at that Level | Top Skill Cluster | Top BLS Skill | Skill Intensity |
|---|---|---|---|---|---|---|
| SLP | CSD | Master’s Degree | 88.5 | Communication | Adaptability | 2.8 |
| AuD | CSD | Doctoral Degree | 95.4 | Communication | Customer Service | 3.0 |
| AuD | CSD | Doctoral Degree | 95.4 | Communication | Interpersonal | 3.0 |
| HIM | HIM | NA | NA | NA | Computers And Information Technology | NA |
| OTD | OT | Master’s Degree | 86.4 | Communication | Interpersonal | 2.8 |
| DPT | PT | Doctoral Degree | 47.1 | Communication | Interpersonal | 2.8 |
| PAS | PAS | Master’s Degree | 81.1 | Communication | Adaptability | 2.8 |
| AT | SMN | Master’s Degree | 54.3 | Communication | Adaptability | 2.8 |
| AT | SMN | Master’s Degree | 54.3 | Communication | Interpersonal | 2.8 |
| DN | SMN | Post-baccalaureate Certificate | 53.3 | Communication | Interpersonal | 2.9 |
| DN | SMN | Post-baccalaureate Certificate | 53.3 | Communication | Science | 2.9 |
| SS | SMN | Bachelor’s Degree | 59.1 | Communication | Interpersonal | 2.8 |
# Use skill intensity vs education level as a 2D positioning
edu_numeric <- tribble(
~dominant_edu, ~edu_years,
"Less than HS", 10,
"HS Diploma/GED", 12,
"Post-secondary Certificate", 13,
"Some College", 14,
"Associate's Degree", 14,
"Bachelor's Degree", 16,
"Post-baccalaureate Certificate", 17,
"Master's Degree", 18,
"Post-master's Certificate", 19,
"First Professional Degree", 20,
"Doctoral Degree", 21,
"Post-doctoral Training", 22
)
positioning <- synthesis |>
left_join(edu_numeric, by = "dominant_edu")
positioning |>
ggplot(aes(x = edu_years, y = avg_skill_importance,
color = shrs_program, label = shrs_program)) +
geom_point(size = 5) +
geom_text(vjust = -1.2, size = 4.5, fontface = "bold") +
scale_color_manual(values = program_colors) +
scale_x_continuous(
breaks = c(14, 16, 18, 21),
labels = c("Associate's", "Bachelor's", "Master's", "Doctoral")
) +
labs(
title = "Program Positioning: Education Level vs Skill Intensity",
subtitle = "Where each SHRS program sits in terms of training investment and skill demands",
x = "Dominant Education Level", y = "Average Skill Importance (O*NET 1–5)"
) +
theme_minimal(base_size = 13) +
theme(legend.position = "none")
With 9 programs now in scope, we can do richer cross-program comparisons to identify shared skill foundations and unique differentiators.
# Build a program × skill matrix for BLS skills
skill_matrix <- bls_skills |>
select(shrs_program, skill, percentile) |>
pivot_wider(names_from = skill, values_from = percentile)
# Compute distance and cluster
if (nrow(skill_matrix) >= 3) {
skill_dist <- dist(skill_matrix |> select(-shrs_program))
skill_clust <- hclust(skill_dist, method = "ward.D2")
# Label with program names
skill_clust$labels <- skill_matrix$shrs_program
plot(skill_clust, main = "SHRS Programs Clustered by BLS Skill Similarity",
sub = "Ward's method on 17-dimension BLS percentile vectors",
xlab = "", ylab = "Distance")
}
v2.1 Notes: